/***
This do-file retrieves numbers we use in the paper.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

local category "Employment"
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
**# 1. QCEW Base Employment
*-------------------------------------------------------------------------------

* Load QCEW employment counts
project, uses("${root}/data/derived/QCEW/QCEW by county x industry.dta")
use "${root}/data/derived/QCEW/QCEW by county x industry.dta", clear  

drop if inlist(industry_code, "99")
gen statefips = floor(countyfips / 1000)
drop if inlist(statefips, 72, 78)

keep if year == 2020 & month == 1 
gcollapse (sum) qcew_base = qcew_employment, by(countyfips)

tempfile qcew_jan 
save `qcew_jan'

*-------------------------------------------------------------------------------
**# 2. Quarter-to-Quarter Changes by CZ, All CZs, 2021
*-------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
**## 2.1. Clean QCEW
*-------------------------------------------------------------------------------

* Load QCEW employment counts
project, uses("${root}/data/derived/QCEW/QCEW by county x industry.dta")
use "${root}/data/derived/QCEW/QCEW by county x industry.dta", clear  

drop if inlist(industry_code, "99")
gen statefips = floor(countyfips / 1000)
drop if inlist(statefips, 72, 78)

gen quarter = yq(year, qtr)
format %tq quarter

* By CZ 
preserve
project, uses("${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta")
project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
use "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", clear
rename countyfips county_fips 
merge 1:1 county_fips using "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", nogen keep(1 3) keepusing(pop_2014_2018_est)
rename (county_fips pop_2014_2018_est)(countyfips pop_2018)
gcollapse (sum) cz_pop = pop_2018, by(cz czname)
gsort -cz_pop
gen pop_rank = _n 

tempfile ranks
save `ranks'
restore
		
project, uses("${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta")
merge m:1 countyfips using "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", nogen keep(1 3) keepusing(cz)
merge m:1 cz using `ranks', keep(1 3) nogen

gcollapse (sum) qcew_employment, by(quarter cz czname)
		
* Get quarter-to-quarter changes
drop if mi(cz) 
sort cz quarter
gisid cz quarter 
tsset cz quarter
bys cz: gen change_qcew = 100 * (qcew_employment / L.qcew_employment - 1) if _n > 1
bys cz: replace change_qcew = 0 if quarter == tq(2020q1)
		
tempfile qcew_quarter
save `qcew_quarter'

*-------------------------------------------------------------------------------
**## 2.2. Clean Tracker
*-------------------------------------------------------------------------------

project, uses("${root}/data/web/data/Employment - County - Weekly.csv")
import delimited using "${root}/data/web/data/Employment - County - Weekly.csv", clear
drop if (year == 2021 & month >= 10)  | year > 2021                             

* Express employment changes using QCEW base 
merge m:1 countyfips using `qcew_jan', keep(match) nogen
gen employment = (emp + 1) * qcew_base

gen qtr = ceil(month / 3)
gen quarter = yq(year, qtr)
format %tq quarter

project, uses("${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta")
merge m:1 countyfips using "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", nogen keep(1 3) keepusing(cz)
merge m:1 cz using `ranks', keep(1 3) nogen

* Convert from month-level to quarter-level dataset 
gcollapse (sum) employment (first) cz_pop pop_rank, by(cz quarter czname)

* Merge QCEW 
merge 1:1 cz quarter using `qcew_quarter', keep(match) nogen

* Get quarter-to-quarter changes 
drop if mi(cz) 
sort cz quarter
gisid cz quarter 
tsset cz quarter
bys cz: gen change_paychex = 100 * (employment /  L.employment - 1) if _n > 1
bys cz: replace change_paychex = 0 if quarter == tq(2020q1)

*-------------------------------------------------------------------------------
**## 2.3. Get stats
*-------------------------------------------------------------------------------

gen squared_error = (change_paychex - change_qcew) ^ 2

* Correlation and RMSE
keep if year(dofq(quarter)) == 2021
gcollapse (mean) change_paychex change_qcew squared_error (first) cz_pop pop_rank, by(cz czname)

corr change_paychex change_qcew [w = cz_pop] 
local corr_all: di %3.2f r(rho)
sum squared_error [w = cz_pop]
local rmse_all: di %4.2f sqrt(r(mean))

* For top 50 CZs 
keep if pop_rank <= 50 
assert _N == 50

corr change_paychex change_qcew [w = cz_pop] 
local corr_top_50: di %3.2f r(rho)
sum squared_error [w = cz_pop]
local rmse_top_50: di %4.2f sqrt(r(mean))
local rmse_top_50_times2: di %4.0f ceil(sqrt(r(mean)) * 2)

*-------------------------------------------------------------------------------
**# 4. Export stats 
*-------------------------------------------------------------------------------

di `rmse_all'
di `rmse_top_50'
di `rmse_top_50_times2'

* Output numbers for paper 
cap erase "${root}/results/paper numbers/`category'/Paychex vs QCEW Quarter on Quarter.yaml"

yamlout using "${root}/results/paper numbers/`category'/Paychex vs QCEW Quarter on Quarter.yaml",  ///
	key("rmse_all") ///
	comment("All CZs, RMSE") ///
	value(`rmse_all') fmt(%9.2f)
	
yamlout using "${root}/results/paper numbers/`category'/Paychex vs QCEW Quarter on Quarter.yaml",  ///
	key("rmse_top_50") ///
	comment("Top 50 CZs, RMSE") ///
	value(`rmse_top_50') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Paychex vs QCEW Quarter on Quarter.yaml",  ///
	key("rmse_top_50_times2") ///
	comment("Top 50 CZs, RMSE * 2 rounded up to nearest integer") ///
	value(`rmse_top_50_times2') fmt(%9.0f)
	
project, creates("${root}/results/paper numbers/`category'/Paychex vs QCEW Quarter on Quarter.yaml")
